home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Shareware Grab Bag
/
Shareware Grab Bag.iso
/
090
/
cmln1285.arc
/
DADA.PAS
< prev
next >
Wrap
Pascal/Delphi Source File
|
1986-02-27
|
50KB
|
1,007 lines
{ PROGRAM: DADA.PAS
AUTHOR: Brian Hayes
DATE BEGUN: September 17, 1985
FOR COMPILATION BY: Turbo Pascal v 3.0
}
{ DESCRIPTION:
A compiler for a toy language called Dada, which adopts much
of the vocabulary of Pascal but lacks many of its features, most
notably functions, local variables, defined types and a facility
for passing parameters to procedures. The "object code" produced
by the compiler consists of Forth words. For additional detail see
the comments below and the accompanying file DADA.DOC.
This program is intended for demonstration purposes only. It
has been compiled and casually tested, but it is certainly not
guaranteed to be error-free. Testing was done with version 3.0
of Borland International's Turbo Pascal, but I have tried to avoid
features peculiar to Borland's implementation. Adaptation to other
Pascal compilers should be easy.
The compatibility of the object code with various Forth systems
is more difficult to assess. See the comments on the code gen-
erator and DADA.DOC Note 7.
}
{ COMPILER DIRECTIVES (Valid for Turbo Pascal only) }
{$B+} {B+ assigns StdIn/StdOut to CON, B- to TRM; default +}
{$C+} {C+ allows ^C and ^S during Read/ReadLn; default +}
{$I+} {I+ enables automatic I/O error checking; default +}
{$R-} {R+ enables run-time checking of index bounds; default -}
{$V-} {V+ requires string parameters to match declared length; default +}
{$U-} {U+ allows ^C interrupt at any time; default -}
{$D+} {D+ unbuffers I/O for devices; default +}
{$F16} {Fn sets maximum number of files open simultaneously; default 16}
{$K+} {K+ enables checking for stack-heap collision; default +}
program Dada;
{*****************************************************************************}
{***************** GLOBAL CONSTANTS *****************}
{*****************************************************************************}
const
MaxIdentLen = 31; { only the first 31 chars saved }
MaxKeyLen = 9; { longest keyword }
MaxErrorLen = 35; { longest error message }
{*****************************************************************************}
{***************** GLOBAL TYPES *****************}
{*****************************************************************************}
type
IdentStr = string[MaxIdentLen];
KeyStr = string[MaxKeyLen];
ErrorStr = string[MaxErrorLen];
ForthStr = string[64]; { for Forth output; see procedure Gen }
{ ErrCode identifies error messages in the array ErrorList;
see DADA.DOC Note 1. }
ErrCode = (Disk, QChar, XPgm, XIdent, XVar, XInt, XBool, XColon, XType, XSemi,
XBegin, XSemEnd, XThen, XDo, XAssgn, XStmt, DupDec, UnDec, Match,
XFactor, XParen, XDot, UnXEOF);
{ TokCode lists all symbols that can possibly be returned by the
scanner. Null is a placeholder that can appear in a few fields
of symbol-table entries. }
TokCode = (Null, Ident, Number, PgmSym, VarSym, ProcSym, BeginSym, EndSym,
IfSym, ThenSym, ElseSym, WhileSym, DoSym, IntSym, BoolSym, TrueSym,
FalseSym, EQ, GT, GE, NE, LE, LT, Plus, Minus, OrSym, Times, Divide,
AndSym, ModSym, NotSym, AssignOp, Colon, LeftParen, RightParen,
Semi, Dot, ReadSym, WriteSym);
TokenRec = record { Definition of the mailbox where the scanner }
Name : IdentStr; { leaves dope on the current token and where }
Code : TokCode; { the parser picks it up. }
end;
SymClass = (Variable, Proc); { Every symbol must be one or the other. }
SymPtr = ^Symbol; { Points to a symbol-table entry. }
{ Format of a symbol-table entry. See DADA.DOC Note 2 }
Symbol = record
Name : IdentStr; { UpCase string of name as read }
Class : SymClass; { either Variable or Proc }
VarType : TokCode; { either IntSym or BoolSym }
Scope : integer; { zero for global, then 1,2,3...}
Next : SymPtr; { pointer to next table entry }
end;
{ The output buffer represents a Forth "screen" of 16 lines
by 64 characters. }
OutBufLines = 1..16;
OutBufChars = 1..64;
OutBufArray = array[OutBufLines] of array[OutBufChars] of char;
{*****************************************************************************}
{***************** GLOBAL VARIABLES *****************}
{*****************************************************************************}
var
OutLine : OutBufLines; { Declared global becaused called by }
OutPoint : OutBufChars; { both InitOutBuf and Gen. }
OutBuf : OutBufArray;
InFile : Text; { source code }
OutFile : file of OutBufArray; { object code }
TK : TokenRec; { where dope on the current token is stashed }
CH : char; { current scanner input }
LineCount : integer; { number of lines in source text }
TypeSet : set of TokCode; { sets defined for convenience }
TFset : set of TokCode; { in the parsing logic }
RelOpSet : set of TokCode;
AddOpSet : set of TokCode;
MultOpSet : set of TokCode;
FirstSym : SymPtr; { link to the start of the symbol-table chain }
CurrentScope : integer; { nesting depth of procedures }
Keywords : array[TokCode] of KeyStr;
ErrorList : array[ErrCode] of ErrorStr;
{*****************************************************************************}
{*****************************************************************************}
{***************** *****************}
{***************** UTILITY ROUTINES *****************}
{***************** *****************}
{*****************************************************************************}
{*****************************************************************************}
{ The Keywords and ErrorList arrays must be initialized when the
program is started. So must the five small sets used to form
symbols into groups. }
procedure InitKeywords;
begin
Keywords[PgmSym] := 'PROGRAM';
Keywords[VarSym] := 'VAR';
Keywords[IntSym] := 'INTEGER';
Keywords[BoolSym] := 'BOOLEAN';
Keywords[BeginSym] := 'BEGIN';
Keywords[EndSym] := 'END';
Keywords[IfSym] := 'IF';
Keywords[ThenSym] := 'THEN';
Keywords[ElseSym] := 'ELSE';
Keywords[WhileSym] := 'WHILE';
Keywords[DoSym] := 'DO';
Keywords[NotSym] := 'NOT';
Keywords[OrSym] := 'OR';
Keywords[AndSym] := 'AND';
Keywords[ModSym] := 'MOD';
Keywords[ProcSym] := 'PROCEDURE';
Keywords[TrueSym] := 'TRUE';
Keywords[FalseSym] := 'FALSE';
Keywords[ReadSym] := 'READLN';
Keywords[WriteSym] := 'WRITELN';
end;
procedure InitErrorList;
begin
ErrorList[Disk ] := 'Trouble with file or disk.';
ErrorList[QChar ] := 'Unrecognized character in input.';
ErrorList[Xpgm ] := 'No program header.';
ErrorList[XIdent ] := 'Identifier expected.';
ErrorList[XVar ] := 'Variable expected.';
ErrorList[XInt ] := 'Integer value expected.';
ErrorList[XBool ] := 'Boolean value expected.';
ErrorList[XColon ] := 'Colon expected.';
ErrorList[XType ] := 'Type designator expected.';
ErrorList[XSemi ] := 'Semicolon expected.';
ErrorList[XBegin ] := '"Begin" expected.';
ErrorList[XSemEnd] := 'Semicolon or "end" expected.';
ErrorList[XThen ] := '"Then" expected.';
ErrorList[XDo ] := '"Do" expected.';
ErrorList[XAssgn ] := 'Assignment statement expected.';
ErrorList[XStmt ] := 'Statement expected.';
ErrorList[DupDec ] := 'Duplicate declaration.';
ErrorList[UnDec ] := 'Undeclared variable or procedure.';
ErrorList[Match ] := 'Type mismatch.';
ErrorList[XFactor] := 'Factor expected.';
ErrorList[XParen ] := 'Closing parenthesis expected.';
ErrorList[XDot ] := 'Period expected.';
ErrorList[UnXEOF ] := 'Unexpected end of file.';
end;
procedure InitSets;
begin
TypeSet := [IntSym, BoolSym];
TFset := [TrueSym, FalseSym];
RelOpSet := [EQ..LT];
AddOpSet := [Plus..OrSym];
MultOpSet := [Times..ModSym];
end;
{ The error-handling given here is minimal. Procedure Error is
handed a code and prints the corresponding string. The only
information supplied on what might have caused the error is
a line number. The program then halts. See DADA.DOC Note 3. }
procedure Error(Problem : ErrCode);
begin
WriteLn('ERROR IN LINE ',LineCount,': ',ErrorList[Problem]);
WriteLn; WriteLn('Compilation aborted.');
Halt;
end;
procedure SayHello;
begin
ClrScr;
WriteLn;
WriteLn;
WriteLn('DADA: A demonstration compiler');
WriteLn;
WriteLn('This program is described in Computer Language, December, 1985');
WriteLn;
WriteLn;
WriteLn;
end;
{ The file handling is as rudimentary as the error routine. Further-
more, the version given here depends on features peculiar to Turbo
Pascal. See DADA.DOC Note 4. }
procedure OpenFiles;
var
FileOK : boolean;
InFileName : string[14];
OutFileName : string[14];
begin
Write('Enter the name of the file to be compiled: ');
ReadLn(InFileName);
Assign(InFile, InFileName);
{$I-} Reset(InFile); {$I+}
FileOK := (IoResult = 0); if not FileOK then Error(Disk);
WriteLn;
Write('Enter the name of the output file: ');
ReadLn(OutFileName);
Assign(OutFile, OutFileName);
{$I-} ReWrite(OutFile); {$I+};
FileOK := (IoResult = 0); if not FileOK then Error(Disk);
WriteLn; WriteLn;
end;
procedure CloseFiles;
begin
Close(InFile);
Close(OutFile);
end;
{*****************************************************************************}
{*****************************************************************************}
{***************** *****************}
{***************** LEXICAL ANALYZER *****************}
{***************** *****************}
{*****************************************************************************}
{*****************************************************************************}
{ Procedure GetTK constitutes the scanner, or lexical analyzer. It
calls on GetCH to read the next character from the input stream
and uses Recognize to set up the two global-variable fields
TK.Code and TK.Name. The main routine first strips out all com-
ments and whitespace characters and then enters a state deter-
mined by the first character of the remaining input. Each state
corresponds to one clause of the case statement. Note that GetCH
converts all alphabetic characters to upper case, so that the
compiler in insensitive to case. Two errors can be issued by the
scanner. Error(QChar) is called if a character outside the recog
nized set appears in the input (except in comments). Error(UnXEOF)
is reported if the scanner encounters end-of-file. Since GetTK
is not called after the final period marking the end of a pro-
gram, the scanner should never read the end of the file.
NOTE: Each time the scanner is called, it goes to work on the
character already in variable CH, not on the next character
from the stream. When GetTK exits, CH holds the first char-
acter beyond the token returned. In other words, the file
pointer is pre-incremented.
See also DADA.DOC Note 5. }
procedure GetTK;
var
I : TokCode; { used in a FOR loop to check for keywords }
procedure GetCH;
begin
if Eof(InFile) then CH := #0 else Read(InFile, CH); { get next if possible}
CH := Upcase(CH); { make case immaterial}
if CH = #13 then LineCount := LineCount + 1; { count for Error }
end;
procedure Recognize(Tok: TokCode); { Called once for each character }
begin { scanned, adding it to the string }
TK.Code := Tok; { in TK.Name and recording the }
TK.Name := Concat(TK.Name,CH); { current analysis in TK.Code. Note}
GetCH; { that TK.Code is not actually }
end; { valid until GetTK returns. }
{ The first section of GetTK strips out comments and the whitespace
characters #9 (tab), #10 (line feed), #12 (form feed), #13 (carriage
return) and $32 (space). For comments any characters following a
left brace are ignored up to the first right brace. Note that this
means comments cannot be nested: Any number of opening braces will
be canceled by the first closing brace. The nested while loops are
needed because comments and whitespace can be interspersed in any
sequence. }
begin { GetTK }
while (CH in ['{',#9,#10,#12,#13,#32]) do { loop while comment, space }
begin
if CH = '{' then repeat GetCH until CH = '}'; { eat up the comment }
GetCH; { toss out the right brace }
while (CH in [#9,#10,#12,#13,#32]) do GetCH; { eat the whitespace }
end;
TK.Name := ''; { reset the identifier string to null }
case CH of { look at the current char from stream}
'A'..'Z' : begin { Ident or keyword}
while (CH in ['A'..'Z','0'..'9']) do { add chars to the}
Recognize(Ident); { TK.Name string }
for I := PgmSym to WriteSym do { An Ident unless }
if Keywords[I] = TK.Name then TK.Code := I; { listed here }
end;
'0'..'9' : while (CH in ['0'..'9']) do Recognize(Number); { numeric literal}
'>' : begin
Recognize(GT); { With two-symbol oper- }
if CH = '=' then Recognize(GE); { ators, start by assum-}
end; { ing the one-symbol }
'<' : begin { form and then revise }
Recognize(LT); { the verdict if the }
if CH = '>' then Recognize(NE) { second character is }
else if CH = '=' then Recognize(LE) { found. }
end;
':' : begin
Recognize(Colon);
if CH = '=' then Recognize(AssignOp);
end;
'=' : Recognize(EQ);
'+' : Recognize(Plus);
'-' : Recognize(Minus);
'*' : Recognize(Times);
'/' : Recognize(Divide);
'(' : Recognize(LeftParen);
')' : Recognize(RightParen);
';' : Recognize(Semi);
'.' : Recognize(Dot);
#0 : Error(UnXEOF); { Program has ended without a period }
else Error(QChar); { Queer character; can't digest it }
end;
end;
{*****************************************************************************}
{*****************************************************************************}
{***************** *****************}
{***************** SYMBOL TABLE *****************}
{***************** *****************}
{*****************************************************************************}
{*****************************************************************************}
{ The three routines Find, Declare and Blot manage the symbol
table. The table is organized as a linked list in which
FirstSym always points to the most recently added entry. The
Next field points to the next-youngest entry, so that
following the chain of Nexts ultimately leads to the first
entry, which is always the declaration of the program
header. Because all variables in Dada are global, the
symbol table has a fixed, predictable structure: the program
declaration is followed by variable declarations and then by
procedure declarations. See DADA.DOC Note 6. }
{ Find is passed an identifier string and returns either a
pointer to the corresponding symbol-table entry or nil if
the identifier does not exist. It traverses the chain of
entries beginning with FirstSym, and so the first matching
entry will be found. }
function Find(ID: IdentStr): SymPtr;
var
ThisSym : SymPtr;
begin
ThisSym := FirstSym; { start with the latest entry }
while ((ID<>ThisSym^.Name) and { loop if no match and... }
(ThisSym<>nil)) do { we're not at the end of list }
ThisSym := ThisSym^.Next; { get next record }
Find := ThisSym; { a match if there is one, or nil }
end;
{ Declare installs both variable names and procedure names in
the symbol table. ID is the name of the Identifier, as given
in TK.Name; CL is either "Proc" or "Variable"; Kind is "IntSym"
or "BoolSym" for variables, "Null" for procedures. }
procedure Declare(ID: IdentStr; CL: SymClass; Kind: TokCode);
var
ThisSym : SymPtr;
begin
ThisSym := Find(ID); { See if it already exists }
if ThisSym <> nil then Error(DupDec); { Call error & halt if it does }
New(ThisSym); { Create a new record }
ThisSym^.Next := FirstSym; { Swap pointers to put the... }
FirstSym := ThisSym; { ...new record first in list }
with FirstSym^ do
begin
Name := ID; { Plug in the values passed... }
Class := CL; { ...as arguments... }
VarType := Kind;
Scope := CurrentScope; { ...and a value from a global }
end;
end;
{ Blot is called when the "end" of a block is reached and removes
from the symbol table all names whose scope is confined to that
block. The global variable CurrentScope is initialized to zero
and incremented each time ParseBlock is called. Blot decrements
CurrentScope and unlinks from the symbol table any entry whose
Scope field is numerically greater than CurrentScope. }
procedure Blot;
var
TrashSym : SymPtr;
begin
CurrentScope := CurrentScope - 1; { back to scope of next outer block}
while FirstSym^.Scope > CurrentScope do { erase entries for closed block }
begin
TrashSym := FirstSym; { Give the pointer an alias }
FirstSym := FirstSym^.Next; { Unlink the record }
Dispose(TrashSym); { Free the allocated memory }
end;
end;
{*****************************************************************************}
{*****************************************************************************}
{***************** *****************}
{***************** CODE GENERATOR *****************}
{***************** *****************}
{*****************************************************************************}
{*****************************************************************************}
{ The code generator is simple to the point of triviality, largely
because the Forth virtual machine offers a very powerful assembly
language. All address calculations, for instance, are done by the
Forth interpreter. With a Forth system that accepts input as a
sequence of CR/LF-delimited lines, the code generator could be
reduced to a one-line procedure: WriteLn(OutFile,Forth). The
routines given here produce Forth "screens," or blocks of 1,024
bytes filled out with blanks (ASCII #32). For more on this for-
matting see DADA.DOC Note 7.
The main procedure of the code generator is Gen, which is called
by the various parsing routines; the argument is a string to be
written to the output file. The string is actually appended to
a buffer that holds 16 lines of 64 characters (the standard .SCR
format). When a line exceeds 62 characters, a new line is started;
when line 16 is reached, the continuation symbol "-->" is written
and the buffer is flushed to the disk and then reset to all blanks.
To make the generated code more readable, the symbol "|" is defined
as a control character that forces Gen to start a new line. The
parsing routines issue Gen('|') after each colon definition.
InitOutBuf sets the 1,024 bytes of the output buffer to the
ASCII blank character (#32) and resets the two array indices
OutLine and OutPoint to 1, which corresponds to the upper
left-hand corner of a Forth screen. The procedure is not made
local to Gen because it is called from the main initializing
routine at program startup. }
procedure InitOutBuf;
begin
for OutLine := 1 to 16 do
for OutPoint := 1 to 64 do OutBuf[Outline,OutPoint] := #32;
OutLine := 1; OutPoint := 1;
end;
procedure Gen(Forth : ForthStr);
var
FileOK : boolean;
I, TempPoint, TempLine : integer; { two temps for testing length }
{ WriteBuf, like OpenFiles, is written with a Turbo-specific
error-checking method. It simply writes the accumulated
buffer to the output file and, if there is no disk error,
calls InitOutBuf to reinitialize the array. }
procedure WriteBuf;
begin
{$I-} Write(OutFile,OutBuf); {$I+}
FileOK := (IoResult = 0); if not FileOK then Error(Disk);
InitOutBuf;
end;
{ NewLine resets the character counter and tests the line count;
if we are on line 15, the recursive call Gen('-->') flushes
the buffer and starts a new screen. }
procedure NewLine;
begin
OutPoint := 1; TempLine := OutLine + 1;
if TempLine >= 15 then Gen('-->') else OutLine := TempLine;
end;
begin { Gen }
if Forth = '|' then begin NewLine; exit; end; { force new line & leave }
TempPoint := OutPoint + Length(Forth); { Temp avoids out-of-range }
if TempPoint > 62 then NewLine; { 62 (not 64) to allow blanks}
for I := 1 to Length(Forth) do
begin
OutBuf[OutLine,OutPoint] := Forth[I]; { copy the string into buffer}
OutPoint := OutPoint + 1;
end;
OutPoint := OutPoint + 1; { allow one blank after code }
if ((Forth = '-->') or (Forth = ';S')) then WriteBuf;
end;
{ GenHeader creates a "run-time library" that precedes the object
code for all Dada programs. Some Forth systems may need additional
or different definitions here. The READ routine provides keyboard
input of signed integers. It could readily be improved. }
procedure GenHeader(PgmName : IdentStr);
begin
Gen('( Output of Dada compiler )'); Gen('|'); { Screen 0 comments }
Gen(Concat('( To execute type: 1 LOAD ',PgmName,' )')); Gen(';S');
Gen('FORTH DEFINITIONS DECIMAL'); Gen('|');
Gen('1 CONSTANT TRUE '); { }
Gen('0 CONSTANT FALSE'); { These synonyms will }
Gen(': NEGATE MINUS ;'); Gen('|'); { not be needed by all }
Gen(': NOT 0= ;'); { Forth systems; others }
Gen(': <> = NOT ;'); { may be required. }
Gen(': >= < NOT ;'); { }
Gen(': <= > NOT ;'); Gen('|'); { }
Gen(': READ KEY DUP 45 = IF TRUE SWAP EMIT KEY ELSE FALSE SWAP'); Gen('|');
Gen(' THEN 0 SWAP BEGIN DUP 13 = NOT WHILE DUP 48 < OVER'); Gen('|');
Gen(' 57 > OR IF DROP 7 EMIT ELSE DUP EMIT 48 - SWAP 10 * +'); Gen('|');
Gen(' THEN KEY REPEAT DROP SWAP IF NEGATE THEN SWAP ! ;'); Gen('|');
Gen(': WRITE @ . CR ;');
Gen('-->');
end;
{*****************************************************************************}
{*****************************************************************************}
{***************** *****************}
{***************** PARSER *****************}
{***************** *****************}
{*****************************************************************************}
{*****************************************************************************}
{ ParseProgram and the routines nested under it constitute the main
driver of DADA.PAS. The organization is outlined in DADA.DOC Note 8.
Each routine calls on GetTK (the scanner). Statements haveing to do
with parsing proper are interleaved with those for type checking
and code generation. }
procedure ParseProgram;
var
HoldID : IdentStr; { hangs onto the program name }
{ ParseVariables is called once by ParseProgram. If the current
token is not "var," there are no variables in the program and
the routine exits. Otherwise each declaration is checked for
proper form and a statement "0 VARIABLE IDENT" is generated to
allocate 16 bits of storage and record its address under the
name IDENT in the Forth dictionary. }
procedure ParseVariables;
var
HoldVar : IdentStr;
begin
if TK.Code = VarSym then { else no variables in entire program }
begin
GetTK; { eat the "var" token }
repeat { loop for arbitrary number of variables }
if TK.Code <> Ident then Error(XIdent); { format is "Ident: Type;" }
HoldVar := TK.Name; GetTK; { hang onto identifier }
if TK.Code <> Colon then Error(XColon); GetTK;
if not (TK.Code in TypeSet) then Error(XType); { TypeSet=IntSym,BoolSym }
Declare(HoldVar,Variable,TK.Code); GetTK; { install in symbol table}
Gen(Concat('0 VARIABLE ',HoldVar)); Gen('|'); { gen code & new line }
if TK.Code <> Semi then Error(XSemi); GetTK; { every decl. must have }
until (TK.Code in [ProcSym,BeginSym]); { no more variables }
end;
end;
procedure ParseBlock(Caller: IdentStr); { "Caller" will be the Ident gen-}
var { erated when "begin" is reached.}
HoldID : IdentStr; { HoldID passed as Caller to }
{ next nested block. }
procedure ParseStatement;
var
IdentPtr : SymPtr; { used to check symbol table }
HoldID : IdentStr; { hold while class & type checked }
HoldType : TokCode; { hold while exp. type is checked }
{ All the routines from ParseExpression on down are defined as
functions rather than procedures. They return the type (integer
or boolean) deduced from the operations specified. The "HoldOp"
variables are needed to delay code generation for postfix notation.
The "HoldType" variables record the type of the first operand so
that it can be compared with the type of the second operand. }
function ParseExpression: TokCode;
var
HoldRelOp : IdentStr;
HoldType : TokCode;
function ParseSimpleExpr: TokCode;
var
HoldAddOp : IdentStr;
HoldType : TokCode;
function ParseTerm: TokCode;
var
HoldMultOp : IdentStr;
HoldType : TokCode;
function ParseSignedFactor: TokCode;
var
IdentPtr : SymPtr;
HoldType : TokCode;
{ ParseFactor is the lowest-level routine in the parser. For a factor
to be recognized as valid it must be either a boolean literal (TRUE
of FALSE), a numeric literal, an identifier that designates a var-
iable or a parenthesized expression. The case statement considers
each of these possibilities in turn. }
function ParseFactor: TokCode;
var
IdentPtr: SymPtr; { needed to consult the symbol table }
begin
case TK.Code of
TrueSym,
FalseSym : begin
ParseFactor := BoolSym; { return type boolean }
Gen(TK.Name); GetTK; { Gen TRUE or FLASE }
end;
Number : begin
ParseFactor := IntSym; { return type integer }
Gen(TK.Name); GetTK; { Gen numeric literal }
end;
Ident : begin
IdentPtr := Find(TK.Name); { look up the name }
if IdentPtr = nil then Error(UnDec) { not found? }
else begin
if IdentPtr^.Class <> Variable { can't be proc }
then Error(XVar)
else begin
ParseFactor := IdentPtr^.VarType; { rtn Int or Bool }
Gen(ConCat(TK.Name,' @')); GetTK; { code to fetch }
end;
end;
end;
LeftParen : begin { call ParseExpression recursively }
GetTK; { and return the type }
ParseFactor := ParseExpression; { that it returns }
if TK.Code <> RightParen then Error(XParen);
GetTK; { eat the ")" }
end;
else Error(XFactor); { if none of above, not a valid factor }
end;
end;
{ ParseSignedFactor is introduced into the chain of expression-
parsing functions merely to handle a unary plus, minus or logical
NOT preceding a factor. If none of these is found, the code drops
through directly to ParseFactor. If one of them is found, the
appropriate code is generated after ParseFactor returns, thereby
converting the notation to postfix form. }
begin {ParseSignedFactor}
case TK.Code of
Plus : begin
GetTK; { eat the + sign }
HoldType := ParseFactor; { parse & get type }
if HoldType <> IntSym
then Error(XInt) { +boolean illegal }
else ParseSignedFactor := IntSym; { HoldType=Int }
end;
Minus : begin
GetTK; { eat the - sign }
HoldType := ParseFactor; { parse & get type }
if HoldType <> IntSym
then Error(XInt) { -boolean illegal }
else begin
ParseSignedFactor := IntSym; { HoldType = Int }
Gen('NEGATE'); { code toggles sign}
end;
end;
NotSym : begin
GetTK; { eat NOT symbol }
HoldType := ParseFactor; { parse & get type }
if HoldType <> BoolSym
then Error(XBool) { NOT number illegal}
else begin
ParseSignedFactor := BoolSym; { HoldType = boolean}
Gen('NOT'); { code to invert }
end;
end;
else ParseSignedFactor := ParseFactor; { no +, -, NOT found}
end;
end;
{ ParseTerm recognizes either "SignedFactor" or a subexpression of
the form "SignedFactor MultOp Term". Thus it will always call
ParseSignedFactor, and if the next token is a MultOp, it will
also call itself recursively. }
{ For a lacuna in type-checking, see DADA.DOC Note 9. }
begin {ParseTerm}
HoldType := ParseSignedFactor; { parse & get type first operand }
if (TK.Code in MultOpSet) then { TK = *, /, OR? }
begin
HoldMultOp := TK.Name; { save the Op for postfix }
GetTK; { and eat it }
if not (HoldType = ParseTerm) { parse & get type 2d operand }
then Error(Match); { 1st & 2d operands same type? }
Gen(HoldMultOp); { issue the saved operator }
end;
ParseTerm := HoldType; { return the operand type }
end;
{ ParseSimpleExpr recognizes either "Term" or a subexpression of
the form "Term AddOp SimpleExpr". It always calls ParseTerm
and if the next token is an AddOp, it also calls itself. }
begin {ParseSimpleExpr}
HoldType := ParseTerm; { parse & get type 1st operand }
if (TK.Code in AddOpSet) then { TK = +, -, AND? }
begin
HoldAddOp := TK.Name; { save the Op for postfix }
GetTK; { and eat it }
if not (HoldType = ParseSimpleExpr) { parse & get type 2d operand }
then Error(Match); { 1st & 2d operands same type? }
Gen(HoldAddOp); { issue the save operator }
end;
ParseSimpleExpr := HoldType; { return the operand type }
end;
{ ParseExpression recognizes either "SimpleExpr" or a sub-
expression of the form "SimpleExpr RelOp SimpleExpr." It always
calls ParseSimpleExpr once, and if the next token is a RelOp, it
also makes a second call to ParseSimpleExpr. Note that this scheme
is slightly different from the recursive pattern in the lower-
level functions. On that model one would expect "SimpleExpr RelOp
Expression," so that to parse the second operand the function would
call itself. Such a construction, however, would allow expressions
of the form A > B < C = D, and so on. It would be easy enough to
assign a meaning to these expressions, but the language definition
does not supply one. }
begin {ParseExpression}
HoldType := ParseSimpleExpr; { parse & get type 1st operand }
ParseExpression := HoldType; { type to be returned if no RelOp }
if (TK.Code in RelOpSet) then { TK is >, <, =, etc. ? }
begin
HoldRelOp := TK.Name; { save operator for postfix }
GetTK; { and eat it }
if not (HoldType = ParseSimpleExpr) { parse & get type 2d operand }
then Error(Match); { 1st & 2d operands same type ? }
ParseExpression := BoolSym; { if Expr has Relop, type = bool }
Gen(HoldRelOp); { issue the saved operator }
end;
end;
{ ParseStatement is the most elaborate routine in the parser. The
grammar for Dada specifies five constructs to be recognized as
valid statements: a compound statement delimited by "begin" and
"end," an assignment statement, a procedure call, an "if" state-
ment and a "while" statement. The parser actually includes two
more possibilities: "Read" and "Write" statements, which can be
viewed as predefined procedures. With one exception the grammar
allows these possibilities to be distinguished on the basis of
the first token presented to ParseStatement. The exception is
the discrimination between assignment statements and procedure
calls, which both begin with an identifier. The parser chooses
its path by checking the identifier's class in the symbol table:
a value can be assigned only to a variable, and only a procedure
can be called. }
{ See also DADA.DOC Note 10 }
begin {ParseStatement}
case TK.Code of
BeginSym : begin { must be compound }
GetTK; { eat the "BEGIN" }
while TK.Code <> EndSym do { loop while stmts }
begin
ParseStatement; { calls itself }
if not (TK.Code in [Semi,EndSym]) { delimiter expected }
then Error(XSemEnd);
if TK.Code = Semi then GetTK; { go back for another}
end;
GetTK; { TK must be "END"; eat it }
end;
IfSym : begin { must be If statement}
GetTK; { eat the "IF" }
if not (BoolSym = ParseExpression) { parse expr & ck type}
then Error(XBool); { only boolean allowed}
Gen('IF'); { Forth IF after expr }
if TK.Code <> ThenSym { must have then part }
then Error(XThen); GetTK; { if present, eat it }
ParseStatement; { calls itself }
if TK.Code = ElseSym then { else is optional }
begin { if present, Gen code}
Gen('ELSE'); GetTK; { and eat the token }
ParseStatement; { calls itself again }
end;
Gen('THEN'); { end of Forth cond. }
end;
WhileSym : begin { this is a while loop}
Gen('BEGIN'); GetTK; { Gen marker; eat tok }
if not (BoolSym = ParseExpression) { parse and check type}
then Error(XBool); { must be boolean }
if TK.Code <> DoSym then Error(XDo); { must have Do part }
Gen('WHILE'); GetTK; { eat; Gen Forth test }
ParseStatement; { recursive call }
Gen('REPEAT'); { end of Forth block }
end;
Ident : begin { assignment or call }
IdentPtr := Find(TK.Name); { look up in table }
if IdentPtr = nil then Error(UnDec); { can't find it }
if IdentPtr^.Class = Variable then { must be assignment }
begin
HoldType := IdentPtr^.VarType; { save Ident type... }
HoldID := TK.Name; GetTK; { and name for postfix}
if TK.Code <> AssignOp { must have := sign }
then Error(XAssgn); GetTK; { if so, eat it }
if not (HoldType = ParseExpression) { parse expr & ck type}
then Error(Match); { report mismatch }
Gen(Concat(HoldID,' !')); { code to store value }
end
else { must be proc call }
begin { invoke the Forth }
Gen(TK.Name); GetTK; { word and consume }
end; { the token }
end;
ReadSym : begin { predefined READ proc }
GetTK; { eat token }
if TK.Code <> Ident { must name variable... }
then Error(XIdent); { to hold the value read }
IdentPtr := Find(TK.Name); { look up in table }
if IdentPtr^.Class <> Variable { cannot be proc Ident }
then Error(XVar);
if IdentPtr^.VarType <> IntSym { only integers can... }
then Error(XInt); { be read in Dada }
Gen(Concat(TK.Name,' READ')); { issue the call in Forth}
GetTK; { eat up the Ident }
end;
WriteSym : begin { predefined WRITE proc }
GetTK; { eat token }
if TK.Code <> Ident { must name variable... }
then Error(XIdent); { to be written }
IdentPtr := Find(TK.Name); { look it up }
if IdentPtr^.Class <> Variable { cannot be Proc name }
then Error(XVar);
if IdentPtr^.VarType <> IntSym { only integers can... }
then Error(XInt); { be written }
Gen(Concat(TK.Name,' WRITE')); { issue the call }
GetTK; { consume the Ident }
end;
else Error(XStmt); { if none of the above }
end;
end;
{ ParseBlock has two parts. It first checks for a procedure declar-
ation; if it finds one, it parses the header and calls itself again.
Ultimately, the BEGIN symbol that marks the statement part of a block
must be reached. Each statement is then processed in turn (by Parse-
Statement) until the matching END is reached. The possible nesting
of blocks within blocks is accommodated automatically by the re-
cursive organization of the routines. Recall that ParseBlock is passed
an identifier as an argument, namely the Ident of the procedure or
program that issued the call. This Ident is written into the code as
the designator of a Forth word when "begin" is reached. }
begin { ParseBlock }
CurrentScope := CurrentScope + 1; { bump up nesting count }
while TK.Code = ProcSym do { proc declarations }
begin
GetTK; { eat "procedure" token }
if TK.Code <> Ident then Error(XIdent); { proc must have name }
HoldID := TK.Name; { save to pass to next level }
Declare(TK.Name,Proc,Null); { put in table as proc name }
GetTK; { eat the Ident }
if TK.Code <> Semi then Error(XSemi); { must have a semi }
GetTK; { throw the semi away }
ParseBlock(HoldID); { call again, pass proc name }
if TK.Code <> Semi then Error(XSemi); { proc block must have semi }
GetTK; { eat it up }
end;
if TK.Code <> BeginSym then Error(XBegin); { block begins "BEGIN" }
Gen(Concat(': ',Caller)); { start colon definition }
GetTK; { eat the "BEGIN" }
while TK.Code <> EndSym do { loop for all statements }
begin
ParseStatement; { call for each stmt }
if not (TK.Code in [Semi,EndSym]) { separator or terminator... }
then Error(XSemEnd); { need after each one }
if TK.Code = Semi then GetTK; { if semi, eat & go back }
end; { TK must have been "END" }
GetTK; { eat the END }
Gen(';'); Gen('|'); { end Forth def, force CR }
Blot; { clean up symbol table }
end;
{ ParseProgram sets the entire compiler in motion. It first handles
the program header, saving the program name (which will be the
last Forth word generated). The program is declared in the symbol
table as a procedure like any other, except that its scope field
has a value of zero, which no other procedure can have. ParseProgram
then calls ParseVariables and ParseBlock, which process the body
of the program. Finally there is a check for the final dot. }
begin { ParseProgram }
if TK.Code <> PgmSym then Error(XPgm); { must begin "PROGRAM" }
GetTK; { dispose of that token }
if TK.Code <> Ident then Error(XIdent); { program must have a name }
HoldID := TK.Name; { save, pass to ParseBlock }
Declare(TK.Name,Proc,Null); { install in table }
GenHeader(TK.Name); { output the Forth prelude }
GetTK; { eat the Ident }
if TK.Code <> Semi then Error(XSemi); { header must end with semi }
GetTK; { toss out the semi }
ParseVariables; { do the global declarations }
ParseBlock(HoldID); { give Block the program name }
if TK.Code <> Dot then Error(XDot); { not done until "." read }
Gen(';S'); { tell Forth to stop }
end;
{*****************************************************************************}
{*****************************************************************************}
{***************** *****************}
{***************** MAIN BLOCK *****************}
{***************** *****************}
{*****************************************************************************}
{*****************************************************************************}
{ The main driver routine has little to do: initialize some global
variables, open the files and crank up the parser. }
procedure Initialize;
begin
InitErrorList; { fill up one static array... }
InitKeywords; { and then another }
InitSets; { define sets of tokens }
InitOutBuf; { set up a clean slate }
FirstSym := nil; { make pointer point nowhere }
CurrentScope := 0; { at start scope is global }
LineCount := 1; { start on first source line }
SayHello; { paint the screen }
end;
begin { main block }
Initialize;
OpenFiles;
Read(InFile,CH); CH := Upcase(CH); { get first char for scanner }
GetTK; { and first token for parser }
ParseProgram;
CloseFiles;
WriteLn('Compilation complete.');
end.